import torchaudio

# 👇 手动加载音频文件
def load_audio(file_path: str, target_sr: int = 16000):
    waveform, sample_rate = torchaudio.load(file_path)

    # 转为单声道
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)

    # 重采样到 16kHz
    if sample_rate != target_sr:
        transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)
        waveform = transform(waveform)

    # 返回 numpy array (T,)
    return waveform.squeeze().numpy()
